if (!require("pacman"))
install.packages("pacman")
pacman::p_load(tidyverse,
janitor,
scales,
stringr,
ggthemes,
ggrepel,
patchwork,
grid)
# set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 20))
# set width of code output
options(width = 65)
# set figure parameters for knitr
knitr::opts_chunk$set(
fig.width = 11, # 7" width
fig.asp = 0.618, # the golden ratio
fig.retina = 3, # dpi multiplier for displaying HTML output on retina
fig.align = "center", # center align figures
dpi = 300 # higher dpi, sharper image
)Q1 Solution
tuesdata <- tidytuesdayR::tt_load('2025-01-21')
exped_tidy <- tuesdata$exped_tidy
peaks_tidy <- tuesdata$peaks_tidy
exped_tidy_cleaned <- clean_names(exped_tidy)
peaks_tidy_cleaned <- clean_names(peaks_tidy)
pivoted_data <- exped_tidy_cleaned |>
pivot_longer(
cols = c(route1, route2, route3, route4, success1, success2, success3, success4),
names_to = c(".value", "index"),
names_pattern = "(route|success)(\\d+)"
)
pivoted_data <- pivoted_data |>
filter(!is.na(route))
pivoted_data <- pivoted_data |>
mutate(route = str_replace_all(route, "S Col SE Ridge", "S Col-SE Ridge"))
summary_data <- pivoted_data |>
group_by(peakid) |>
summarize(
attempts = n(),
success_rate = (mean(success, na.rm = TRUE) * 100), # This remains NUMERIC for fill aesthetic
.groups = 'drop' # Good practice to drop grouping after summarize
) |>
arrange(desc(attempts)) |>
slice_head(n = 4)
# Create a SEPARATE column for the label, formatted as a character string
summary_data$success_rate_label <- sprintf("%.1f%%", summary_data$success_rate)
# Join with peaks_tidy_cleaned to get peak names, keeping original peakid
summary_data <- summary_data |>
left_join(select(peaks_tidy_cleaned, peakid, pkname), by = "peakid")
ggplot(summary_data, aes(x = reorder(pkname, -attempts), y = attempts, fill = success_rate)) +
geom_col() +
geom_text(aes(label = success_rate_label), # Use the new character column for labels
vjust = 1, # Adjust vertical position if needed
nudge_y = -3,
color = "black") + # Choose a good color for readability
scale_fill_viridis_c(
option = "viridis", # Corrected 'options' to 'option'
name = "Success Rate (%)",
limits = c(0, 100),
labels = function(x) paste0(x, "%")
) +
coord_cartesian(ylim = c(0, 200)) +
labs(
x = "Peaks",
y = "Number of Attempts",
title = "Top 4 Peaks by Number of Attempts by all Nations",
subtitle = "Bar fill indicates success rate (%)",
caption = "Source: https://github.com/rfordatascience/tidytuesday"
) +
theme_minimal(base_size = 14)selected_peaks <- c("EVER", "AMAD", "LHOT", "MANA", "HIML")
pivoted_data_cleaned <- pivoted_data |>
filter(!is.na(route) & !is.na(nation))
summary_data_with_combined_key <- pivoted_data_cleaned |>
group_by(peakid, route, nation) |>
summarize(
attempts_on_route_per_nation = n(),
success_rate_on_route = (mean(success, na.rm = TRUE) * 100),
.groups = 'drop'
) |>
mutate(peakid_route = paste(peakid, route, sep = " - ")) |>
arrange(peakid, route, desc(attempts_on_route_per_nation))
ever_df <- summary_data_with_combined_key |>
filter(peakid == 'EVER')
# Calculate total attempts per nation across all routes
total_attempts <- ever_df |>
group_by(nation)|>
summarise(total_attempts = sum(attempts_on_route_per_nation)) |>
slice_max(order_by = total_attempts, n = 3)
# Get the top 3 nations
top_nations <- total_attempts$nation
# Filter the dataframe to include only the top 3 nations
ever_top3 <- ever_df |>
filter(nation %in% top_nations)
# Create the bubble chart
p1 <- ggplot(ever_top3, aes(x = route, y = success_rate_on_route)) +
geom_point(aes(color = attempts_on_route_per_nation, size = attempts_on_route_per_nation),
alpha = 0.7,
position = "identity") +
geom_text_repel(data = subset(ever_top3, attempts_on_route_per_nation >= 1),
aes(label = nation, color = attempts_on_route_per_nation),
size = 5,
box.padding = 0.7,
point.padding = 0.6,
min.segment.length = Inf) +
scale_color_viridis_c(option = "turbo",
name = "Attempts on Route",
breaks = c(1, 10, 20, 30, 40),
limits = c(0, 40),
guide = "none") +
scale_size_continuous(range = c(3, 15),
name = "Attempts on Route",
breaks = c(10, 20, 30),
limits = c(0, 40),
guide = "none")+
annotate("text", y = 125, x = 0.7, label = "Everest", size = 5, fontface = "bold") +
labs(x = NULL,
y = "Success Rate (%)") +
scale_y_continuous(breaks = seq(0, 100, by = 25)) +
scale_x_discrete(labels = label_wrap(10)) +
coord_cartesian(ylim = c(-5, 125)) +
theme_minimal() +
theme(
axis.text.x = element_text(hjust = 0.5, size = 9),
legend.position = "none"
)amad_df <- summary_data_with_combined_key |>
filter(peakid == 'AMAD')
# Calculate total attempts per nation across all routes
total_attempts <- amad_df |>
group_by(nation)|>
summarise(total_attempts = sum(attempts_on_route_per_nation)) |>
slice_max(order_by = total_attempts, n = 3)
# Get the top 3 nations
top_nations <- total_attempts$nation
# Filter the dataframe to include only the top 3 nations
amad_top3 <- amad_df |>
filter(nation %in% top_nations)
print(amad_top3)# A tibble: 5 × 6
peakid route nation attempts_on_route_per_nation
<chr> <chr> <chr> <int>
1 AMAD N Ridge USA 1
2 AMAD SW Ridge USA 23
3 AMAD SW Ridge UK 21
4 AMAD SW Ridge Nepal 11
5 AMAD W Face Nepal 1
# ℹ 2 more variables: success_rate_on_route <dbl>,
# peakid_route <chr>
# Create the bubble chart
p2 <- ggplot(amad_top3, aes(x = route, y = success_rate_on_route)) +
geom_point(aes(color = attempts_on_route_per_nation, size = attempts_on_route_per_nation),
alpha = 0.7,
position = position_jitter(width = 0)) +
geom_text_repel(data = subset(amad_top3, attempts_on_route_per_nation >= 1),
aes(label = nation, color = attempts_on_route_per_nation),
size = 5,
box.padding = 0.6,
point.padding = 0.8,
min.segment.length = Inf) +
scale_color_viridis_c(option = "turbo",
name = "Attempts on Route",
breaks = c(1, 10, 20, 30, 40),
limits = c(0, 40),
guide = "none") +
scale_size_continuous(range = c(3, 15),
name = "Attempts on Route",
breaks = c(10, 20, 30),
limits = c(0, 40),
guide = "none") +
annotate("text", y = 120, x = 0.9, label = "Ama Dablam", size = 5, fontface = "bold") +
labs(x = NULL,
y = NULL) +
scale_y_continuous(breaks = seq(0, 100, by = 25)) + # Adjusted seq start to 0 for clarity
scale_x_discrete(labels = label_wrap(10)) +
coord_cartesian(ylim = c(-5, 120)) +
theme_minimal() +
theme(
axis.text.x = element_text(hjust = 0.5, size = 9),
legend.position = "none"
)lhot_df <- summary_data_with_combined_key |>
filter(peakid == 'LHOT')
# Calculate total attempts per nation across all routes
total_attempts <- lhot_df |>
group_by(nation)|>
summarise(total_attempts = sum(attempts_on_route_per_nation)) |>
slice_max(order_by = total_attempts, n = 3)
# Get the top 3 nations
top_nations <- total_attempts$nation
# Filter the dataframe to include only the top 3 nations
lhot_top3 <- lhot_df |>
filter(nation %in% top_nations)
print(lhot_top3)# A tibble: 6 × 6
peakid route nation attempts_on_route_per_nation
<chr> <chr> <chr> <int>
1 LHOT S Col-W Face USA 11
2 LHOT S Col-W Face India 7
3 LHOT S Col-W Face Nepal 6
4 LHOT W Face Nepal 11
5 LHOT W Face India 10
6 LHOT W Face USA 9
# ℹ 2 more variables: success_rate_on_route <dbl>,
# peakid_route <chr>
# Create the bubble chart
p3 <- ggplot(lhot_top3, aes(x = route, y = success_rate_on_route)) +
geom_point(aes(color = attempts_on_route_per_nation, size = attempts_on_route_per_nation),
alpha = 0.6,
position = "identity") +
geom_text_repel(data = subset(lhot_top3, attempts_on_route_per_nation >= 1),
aes(label = nation, color = attempts_on_route_per_nation),
size = 5,
box.padding = 0.6,
point.padding = 0.8,
min.segment.length = Inf,
position = "identity") +
scale_color_viridis_c(option = "turbo",
name = "Attempts on Route",
breaks = c(1, 10, 20, 30, 40),
limits = c(0, 40),
guide = "none") +
scale_size_continuous(range = c(3, 15),
name = "Attempts on Route",
breaks = c(10, 20, 30),
limits = c(0, 40),
guide = "none")+
annotate("text", y = 130, x = 0.6, label = "Lhotse", size = 5, fontface = "bold") +
labs(x = "Route",
y = "Success Rate (%)") +
scale_y_continuous(breaks = seq(0, 100, by = 25)) +
scale_x_discrete(labels = label_wrap(10)) +
coord_cartesian(ylim = c(-5, 130)) +
theme_minimal() +
theme(
axis.text.x = element_text(hjust = 0.5, size = 9),
legend.position = "none"
)mana_df <- summary_data_with_combined_key |>
filter(peakid == 'MANA')
# Calculate total attempts per nation across all routes
total_attempts <- mana_df |>
group_by(nation)|>
summarise(total_attempts = sum(attempts_on_route_per_nation)) |>
slice_max(order_by = total_attempts, n = 3)
# Get the top 3 nations
top_nations <- total_attempts$nation
# Filter the dataframe to include only the top 3 nations
mana_top3 <- mana_df |>
filter(nation %in% top_nations)
print(mana_top3)# A tibble: 4 × 6
peakid route nation attempts_on_route_per_nation
<chr> <chr> <chr> <int>
1 MANA NE Face Nepal 20
2 MANA NE Face USA 10
3 MANA NE Face China 6
4 MANA NE Face UK 6
# ℹ 2 more variables: success_rate_on_route <dbl>,
# peakid_route <chr>
# Create the bubble chart
p4 <- ggplot(mana_top3, aes(x = route, y = success_rate_on_route)) +
geom_point(aes(color = attempts_on_route_per_nation, size = attempts_on_route_per_nation),
alpha = 0.7,
position = "identity") +
geom_text_repel(data = subset(mana_top3, attempts_on_route_per_nation >= 1),
aes(label = nation, color = attempts_on_route_per_nation),
size = 5,
box.padding = 0.6,
point.padding = 0.8,
min.segment.length = Inf,
position = "identity") +
scale_color_viridis_c(option = "turbo",
name = "\n",
breaks = c(1, 10, 20, 30, 40),
limits = c(0, 40),
guide = guide_colorbar(direction = "horizontal", title.position = "top")) +
scale_size_continuous(range = c(3, 15),
name = " Attempts on route metrix (size + color)",
breaks = c(1, 10, 20, 30, 40),
limits = c(0, 40),
guide = guide_legend(title.position = "top"))+
annotate("text", y = 120, x = 0.55, label = "Manaslu", size = 5, fontface = "bold") +
labs (x = "Route",
y = NULL) +
scale_y_continuous(breaks = seq(0, 100, by = 25)) +
scale_x_discrete(labels = label_wrap(10)) +
coord_cartesian(ylim = c(-5, 120)) +
theme_minimal() +
theme(
axis.text.x = element_text(hjust = 0.5, size = 9),
legend.position = "none"
)# Combine plots with patchwork
combined_plot <- (p1 + p2) / (p3 + p4) +
plot_layout(guides = "collect") +
plot_annotation(
title = "National Route Preferences and Success Rates\nin High-Altitude Peak Expeditions (2020-2024)",
subtitle = "Bubble Size and Color Show Attempts, with Success Rates\nfor Top 3 Nations Across Four Most Popular Peaks",
caption = "Source: https://github.com/rfordatascience/tidytuesday",
theme = theme(
plot.title = element_text(face = "bold", size = 18, hjust = 0.2),
plot.subtitle = element_text(size = 14, hjust = 0.2),
plot.caption = element_text(size = 14)
)
) &
theme(
legend.position = "bottom",
legend.box = "horizontal",
legend.title.align = 0.5,
#legend.margin = margin(t = 5, b = 5),
legend.title = element_text(size = 14)
)
plot(combined_plot)